<!DOCTYPE HTML>
<html lang="zh-CN">


<head>
    <meta charset="utf-8">
    <meta name="keywords" content="DQN practice-maze">
    <meta name="description" content="DQN practice maze">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
    <meta name="renderer" content="webkit|ie-stand|ie-comp">
    <meta name="mobile-web-app-capable" content="yes">
    <meta name="format-detection" content="telephone=no">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
    <meta name="referrer" content="no-referrer-when-downgrade">
    <!-- Global site tag (gtag.js) - Google Analytics -->


    <title>DQN practice-maze | B-612</title>
    <link rel="icon" type="image/png" href="/favicon.png">
    


    <!-- bg-cover style     -->



<link rel="stylesheet" type="text/css" href="/libs/awesome/css/all.min.css">
<link rel="stylesheet" type="text/css" href="/libs/materialize/materialize.min.css">
<link rel="stylesheet" type="text/css" href="/libs/aos/aos.css">
<link rel="stylesheet" type="text/css" href="/libs/animate/animate.min.css">
<link rel="stylesheet" type="text/css" href="/libs/lightGallery/css/lightgallery.min.css">
<link rel="stylesheet" type="text/css" href="/css/matery.css">
<link rel="stylesheet" type="text/css" href="/css/my.css">
<link rel="stylesheet" type="text/css" href="/css/dark.css" media="none" onload="if(media!='all')media='all'">




    <link rel="stylesheet" href="/libs/tocbot/tocbot.css">
    <link rel="stylesheet" href="/css/post.css">




    
        <link rel="stylesheet" type="text/css" href="/css/reward.css">
    



    <script src="/libs/jquery/jquery-3.6.0.min.js"></script>

<meta name="generator" content="Hexo 6.3.0">
<style>.github-emoji { position: relative; display: inline-block; width: 1.2em; min-height: 1.2em; overflow: hidden; vertical-align: top; color: transparent; }  .github-emoji > span { position: relative; z-index: 10; }  .github-emoji img, .github-emoji .fancybox { margin: 0 !important; padding: 0 !important; border: none !important; outline: none !important; text-decoration: none !important; user-select: none !important; cursor: auto !important; }  .github-emoji img { height: 1.2em !important; width: 1.2em !important; position: absolute !important; left: 50% !important; top: 50% !important; transform: translate(-50%, -50%) !important; user-select: none !important; cursor: auto !important; } .github-emoji-fallback { color: inherit; } .github-emoji-fallback img { opacity: 0 !important; }</style>
<link rel="alternate" href="/atom.xml" title="B-612" type="application/atom+xml">
</head>


<body>
    <header class="navbar-fixed">
    <nav id="headNav" class="bg-color nav-transparent">
        <div id="navContainer" class="nav-wrapper container">
            <div class="brand-logo">
                <a href="/" class="waves-effect waves-light">
                    
                    <img src="/medias/logo.png" class="logo-img" alt="LOGO">
                    
                    <span class="logo-span">B-612</span>
                </a>
            </div>
            

<a href="#" data-target="mobile-nav" class="sidenav-trigger button-collapse"><i class="fas fa-bars"></i></a>
<ul class="right nav-menu">
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/" class="waves-effect waves-light">
      
      <i class="fas fa-home" style="zoom: 0.6;"></i>
      
      <span>首页</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/tags" class="waves-effect waves-light">
      
      <i class="fas fa-tags" style="zoom: 0.6;"></i>
      
      <span>标签</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/categories" class="waves-effect waves-light">
      
      <i class="fas fa-bookmark" style="zoom: 0.6;"></i>
      
      <span>分类</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/archives" class="waves-effect waves-light">
      
      <i class="fas fa-archive" style="zoom: 0.6;"></i>
      
      <span>归档</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/tips" class="waves-effect waves-light">
      
      <i class="fa-solid fa-wand-magic-sparkles" style="zoom: 0.6;"></i>
      
      <span>附录</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/about" class="waves-effect waves-light">
      
      <i class="fas fa-user-circle" style="zoom: 0.6;"></i>
      
      <span>关于</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/friends" class="waves-effect waves-light">
      
      <i class="fas fa-address-book" style="zoom: 0.6;"></i>
      
      <span>友情链接</span>
    </a>
    
  </li>
  
  <li>
    <a href="#searchModal" class="modal-trigger waves-effect waves-light">
      <i id="searchIcon" class="fas fa-search" title="搜索" style="zoom: 0.85;"></i>
    </a>
  </li>
  <li>
    <a href="javascript:;" class="waves-effect waves-light" onclick="switchNightMode()" title="深色/浅色模式" >
      <i id="sum-moon-icon" class="fas fa-sun" style="zoom: 0.85;"></i>
    </a>
  </li>
</ul>


<div id="mobile-nav" class="side-nav sidenav">

    <div class="mobile-head bg-color">
        
        <img src="/medias/logo.png" class="logo-img circle responsive-img">
        
        <div class="logo-name">B-612</div>
        <div class="logo-desc">
            
            Never really desperate, only the lost of the soul.
            
        </div>
    </div>

    <ul class="menu-list mobile-menu-list">
        
        <li class="m-nav-item">
	  
		<a href="/" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-home"></i>
			
			首页
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/tags" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-tags"></i>
			
			标签
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/categories" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-bookmark"></i>
			
			分类
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/archives" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-archive"></i>
			
			归档
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/tips" class="waves-effect waves-light">
			
			    <i class="fa-fw fa-solid fa-wand-magic-sparkles"></i>
			
			附录
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/about" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-user-circle"></i>
			
			关于
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/friends" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-address-book"></i>
			
			友情链接
		</a>
          
        </li>
        
        
        <li><div class="divider"></div></li>
        <li>
            <a href="https://github.com/wangjueya/B612-Factory" class="waves-effect waves-light" target="_blank">
                <i class="fab fa-github-square fa-fw"></i>Fork Me
            </a>
        </li>
        
    </ul>
</div>


        </div>

        
            <style>
    .nav-transparent .github-corner {
        display: none !important;
    }

    .github-corner {
        position: absolute;
        z-index: 10;
        top: 0;
        right: 0;
        border: 0;
        transform: scale(1.1);
    }

    .github-corner svg {
        color: #0f9d58;
        fill: #fff;
        height: 64px;
        width: 64px;
    }

    .github-corner:hover .octo-arm {
        animation: a 0.56s ease-in-out;
    }

    .github-corner .octo-arm {
        animation: none;
    }

    @keyframes a {
        0%,
        to {
            transform: rotate(0);
        }
        20%,
        60% {
            transform: rotate(-25deg);
        }
        40%,
        80% {
            transform: rotate(10deg);
        }
    }
</style>

<a href="https://github.com/wangjueya/B612-Factory" class="github-corner tooltipped hide-on-med-and-down" target="_blank"
   data-tooltip="Fork Me" data-position="left" data-delay="50">
    <svg viewBox="0 0 250 250" aria-hidden="true">
        <path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path>
        <path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2"
              fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path>
        <path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z"
              fill="currentColor" class="octo-body"></path>
    </svg>
</a>
        
    </nav>

</header>

    



<div class="bg-cover pd-header post-cover" style="background-image: url('/medias/featureimages/18.jpg')">
    <div class="container" style="right: 0px;left: 0px;">
        <div class="row">
            <div class="col s12 m12 l12">
                <div class="brand">
                    <h1 class="description center-align post-title">DQN practice-maze</h1>
                </div>
            </div>
        </div>
    </div>
</div>




<main class="post-container content">

    
    <div class="row">
    <div id="main-content" class="col s12 m12 l9">
        <!-- 文章内容详情 -->
<div id="artDetail">
    <div class="card">
        <div class="card-content article-info">
            <div class="row tag-cate">
                <div class="col s7">
                    
                    <div class="article-tag">
                        
                            <a href="/tags/DQN/">
                                <span class="chip bg-color">DQN</span>
                            </a>
                        
                    </div>
                    
                </div>
                <div class="col s5 right-align">
                    
                    <div class="post-cate">
                        <i class="fas fa-bookmark fa-fw icon-category"></i>
                        
                            <a href="/categories/05-Program-Design/" class="post-category">
                                05-Program-Design
                            </a>
                        
                            <a href="/categories/05-Program-Design/Algorithm/" class="post-category">
                                Algorithm
                            </a>
                        
                            <a href="/categories/05-Program-Design/Algorithm/DQN/" class="post-category">
                                DQN
                            </a>
                        
                    </div>
                    
                </div>
            </div>

            <div class="post-info">
                
                <div class="post-date info-break-policy">
                    <i class="far fa-calendar-minus fa-fw"></i>发布日期:&nbsp;&nbsp;
                    2023-12-30
                </div>
                

                
                <div class="post-date info-break-policy">
                    <i class="far fa-calendar-check fa-fw"></i>更新日期:&nbsp;&nbsp;
                    2024-01-01
                </div>
                

                
                <div class="info-break-policy">
                    <i class="far fa-file-word fa-fw"></i>文章字数:&nbsp;&nbsp;
                    1.9k
                </div>
                

                
                <div class="info-break-policy">
                    <i class="far fa-clock fa-fw"></i>阅读时长:&nbsp;&nbsp;
                    8 分
                </div>
                

                
                    <div id="busuanzi_container_page_pv" class="info-break-policy">
                        <i class="far fa-eye fa-fw"></i>阅读次数:&nbsp;&nbsp;
                        <span id="busuanzi_value_page_pv"></span>
                    </div>
				

                
                <div class="info-break-policy" style='margin-left: 3px'>
                    
                    <a target="_blank" rel="noopener" href="https://github.com/WANGJUEYA/magic-book/edit/master/05-Program-Design/Algorithm/DQN/practice-maze.md">
                        <i class="far fa-regular fa-pen-to-square"></i>
                    </a>
                </div>
                
            </div>
        </div>
        <hr class="clearfix">

        
        <!-- 是否加载使用自带的 prismjs. -->
        <link rel="stylesheet" href="/libs/prism/prism.min.css">
        

        

        <div class="card-content article-card-content">
            <div id="articleContent">
                <h2 id="系统环境">系统环境</h2>
<ul>
<li>windows + CUDA=11.6</li>
<li>miniconda3</li>
<li>python=[3.7.12](<a target="_blank" rel="noopener" href="https://www.python.org/downloads/">https://www.python.org/downloads/</a>
<ul>
<li>conda create -n dqn python=3.7.12</li>
</ul>
</li>
<li>pytorch=<a target="_blank" rel="noopener" href="https://pytorch.org/get-started/previous-versions/">1.7.1</a>  # numpy==1.21.6
<ul>
<li>conda install pytorch==1.7.1 cudatoolkit=11.0 -c pytorch</li>
<li>pip install “torch-1.7.1+cpu-cp37-cp37m-win_amd64.whl” # <a target="_blank" rel="noopener" href="https://download.pytorch.org/whl/torch/">https://download.pytorch.org/whl/torch/</a></li>
</ul>
</li>
<li>conda install gym[classic_control] # gym=0.21.0</li>
<li>conda install pygame==2.1.0 tensorboard=2.11.2 tensorboardX==2.5.1 pyglet==1.5.27</li>
</ul>
<h2 id="附录">附录</h2>
<h3 id="项目地址">项目地址</h3>
<pre class="line-numbers language-bash" data-language="bash"><code class="language-bash"><span class="token function">git</span> clone git@github.com:WANGJUEYA/DQN.git<span aria-hidden="true" class="line-numbers-rows"><span></span></span></code></pre>
<h3 id="参考资料">参考资料</h3>
<ul>
<li><a target="_blank" rel="noopener" href="https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf">《Playing Atari with Deep Reinforcement Learning》</a></li>
<li><a target="_blank" rel="noopener" href="https://storage.googleapis.com/deepmind-media/dqn/DQNNaturePaper.pdf">《Human-level Control through Deep Reinforcement Learning：Nature杂志》</a></li>
<li><a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/97856004">DQN(Deep Q-learning)算法原理与实现</a></li>
<li><a target="_blank" rel="noopener" href="https://www.manning.com/books/deep-reinforcement-learning-in-action">《深度强化学习实战》</a></li>
<li><a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/630554489">DQN基本概念和算法流程</a></li>
</ul>
<h3 id="CartPole"><a href="CartPole.py">CartPole</a></h3>
<pre class="line-numbers language-bash" data-language="bash"><code class="language-bash">python CartPole.py
tensorboard <span class="token parameter variable">--logdir</span><span class="token operator">=</span><span class="token string">"run/MemoryCapacity_100_CustomReward/"</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span></span></code></pre>
<pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token keyword">import</span> gym
<span class="token keyword">import</span> numpy <span class="token keyword">as</span> np
<span class="token keyword">import</span> torch
<span class="token keyword">import</span> torch<span class="token punctuation">.</span>nn <span class="token keyword">as</span> nn
<span class="token keyword">import</span> torch<span class="token punctuation">.</span>nn<span class="token punctuation">.</span>functional <span class="token keyword">as</span> F
<span class="token keyword">from</span> torch<span class="token punctuation">.</span>utils<span class="token punctuation">.</span>tensorboard <span class="token keyword">import</span> SummaryWriter

<span class="token comment"># gym=0.26.0 https://blog.csdn.net/qq_43674552/article/details/127344366</span>

<span class="token comment"># Hyper Parameters 超参数</span>
EPOCH <span class="token operator">=</span> <span class="token number">400</span>  <span class="token comment"># 400个episode循环</span>
BATCH_SIZE <span class="token operator">=</span> <span class="token number">32</span>  <span class="token comment"># 样本数量</span>
LR <span class="token operator">=</span> <span class="token number">0.01</span>  <span class="token comment"># learning rate | 学习率</span>
EPSILON <span class="token operator">=</span> <span class="token number">0.9</span>  <span class="token comment"># greedy policy</span>
GAMMA <span class="token operator">=</span> <span class="token number">0.9</span>  <span class="token comment"># reward discount</span>
TARGET_REPLACE_ITER <span class="token operator">=</span> <span class="token number">100</span>  <span class="token comment"># target update frequency | 目标网络更新频率</span>
MEMORY_CAPACITY <span class="token operator">=</span> <span class="token number">2000</span>  <span class="token comment"># 记忆库容量</span>
env <span class="token operator">=</span> gym<span class="token punctuation">.</span>make<span class="token punctuation">(</span><span class="token string">'CartPole-v0'</span><span class="token punctuation">)</span>  <span class="token comment"># 使用gym库中的环境：CartPole，且打开封装</span>
env <span class="token operator">=</span> env<span class="token punctuation">.</span>unwrapped  <span class="token comment"># 打开环境封装</span>
N_ACTIONS <span class="token operator">=</span> env<span class="token punctuation">.</span>action_space<span class="token punctuation">.</span>n  <span class="token comment"># 杆子动作个数 (2个)</span>
N_STATES <span class="token operator">=</span> env<span class="token punctuation">.</span>observation_space<span class="token punctuation">.</span>shape<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>  <span class="token comment"># 杆子状态个数 (4个)</span>

<span class="token triple-quoted-string string">"""
torch.nn是专门为神经网络设计的模块化接口。nn构建于Autograd之上，可以用来定义和运行神经网络。
nn.Module是nn中十分重要的类，包含网络各层的定义及forward方法。
定义网络：
    需要继承nn.Module类，并实现forward方法。
    一般把网络中具有可学习参数的层放在构造函数__init__()中。
    只要在nn.Module的子类中定义了forward函数，backward函数就会被自动实现(利用Autograd)。
"""</span>


<span class="token comment"># 定义Net类 (定义网络)</span>
<span class="token keyword">class</span> <span class="token class-name">Net</span><span class="token punctuation">(</span>nn<span class="token punctuation">.</span>Module<span class="token punctuation">)</span><span class="token punctuation">:</span>

    <span class="token keyword">def</span> <span class="token function">__init__</span><span class="token punctuation">(</span>self<span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># 定义Net的一系列属性</span>
        <span class="token comment"># nn.Module的子类函数必须在构造函数中执行父类的构造函数</span>
        <span class="token builtin">super</span><span class="token punctuation">(</span>Net<span class="token punctuation">,</span> self<span class="token punctuation">)</span><span class="token punctuation">.</span>__init__<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># 等价与nn.Module.__init__()</span>
        self<span class="token punctuation">.</span>fc1 <span class="token operator">=</span> nn<span class="token punctuation">.</span>Linear<span class="token punctuation">(</span>N_STATES<span class="token punctuation">,</span> <span class="token number">20</span><span class="token punctuation">)</span>  <span class="token comment"># 设置第一个全连接层(输入层到隐藏层): 状态数个神经元到20个神经元</span>
        self<span class="token punctuation">.</span>fc1<span class="token punctuation">.</span>weight<span class="token punctuation">.</span>data<span class="token punctuation">.</span>normal_<span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0.1</span><span class="token punctuation">)</span>  <span class="token comment"># 权重初始化 (均值为0，方差为0.1的正态分布)</span>
        self<span class="token punctuation">.</span>fc2 <span class="token operator">=</span> nn<span class="token punctuation">.</span>Linear<span class="token punctuation">(</span><span class="token number">20</span><span class="token punctuation">,</span> N_ACTIONS<span class="token punctuation">)</span>  <span class="token comment"># 设置第二个全连接层(隐藏层到输出层): 20个神经元到动作数个神经元</span>
        self<span class="token punctuation">.</span>fc2<span class="token punctuation">.</span>weight<span class="token punctuation">.</span>data<span class="token punctuation">.</span>normal_<span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0.1</span><span class="token punctuation">)</span>  <span class="token comment"># 权重初始化 (均值为0，方差为0.1的正态分布)</span>

    <span class="token keyword">def</span> <span class="token function">forward</span><span class="token punctuation">(</span>self<span class="token punctuation">,</span> x<span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># 定义forward函数 (x为状态)</span>
        x <span class="token operator">=</span> F<span class="token punctuation">.</span>relu<span class="token punctuation">(</span>self<span class="token punctuation">.</span>fc1<span class="token punctuation">(</span>x<span class="token punctuation">)</span><span class="token punctuation">)</span>  <span class="token comment"># 连接输入层到隐藏层，且使用激励函数ReLU来处理经过隐藏层后的值</span>
        <span class="token keyword">return</span> self<span class="token punctuation">.</span>fc2<span class="token punctuation">(</span>x<span class="token punctuation">)</span>  <span class="token comment"># 连接隐藏层到输出层，获得最终的输出值 (即动作值)</span>


<span class="token comment"># 定义DQN类 (定义两个网络)</span>
<span class="token keyword">class</span> <span class="token class-name">DQN</span><span class="token punctuation">(</span><span class="token builtin">object</span><span class="token punctuation">)</span><span class="token punctuation">:</span>

    <span class="token keyword">def</span> <span class="token function">__init__</span><span class="token punctuation">(</span>self<span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># 定义DQN的一系列属性</span>
        self<span class="token punctuation">.</span>target_net<span class="token punctuation">,</span> self<span class="token punctuation">.</span>evaluate_net <span class="token operator">=</span> Net<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> Net<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># 利用Net创建两个神经网络: 评估网络和目标网络</span>
        self<span class="token punctuation">.</span>memory <span class="token operator">=</span> np<span class="token punctuation">.</span>zeros<span class="token punctuation">(</span><span class="token punctuation">(</span>MEMORY_CAPACITY<span class="token punctuation">,</span> N_STATES <span class="token operator">*</span> <span class="token number">2</span> <span class="token operator">+</span> <span class="token number">2</span><span class="token punctuation">)</span><span class="token punctuation">)</span>  <span class="token comment"># 初始化记忆库，一行代表一个transition</span>
        self<span class="token punctuation">.</span>loss_Function <span class="token operator">=</span> nn<span class="token punctuation">.</span>MSELoss<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># 使用均方损失函数 (loss(xi, yi)=(xi-yi)^2)</span>
        self<span class="token punctuation">.</span>optimizer <span class="token operator">=</span> torch<span class="token punctuation">.</span>optim<span class="token punctuation">.</span>Adam<span class="token punctuation">(</span>self<span class="token punctuation">.</span>evaluate_net<span class="token punctuation">.</span>parameters<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> lr<span class="token operator">=</span>LR<span class="token punctuation">)</span>  <span class="token comment"># 使用Adam优化器 (输入为评估网络的参数和学习率)</span>
        self<span class="token punctuation">.</span>point <span class="token operator">=</span> <span class="token number">0</span>  <span class="token comment"># for storing memory</span>
        self<span class="token punctuation">.</span>learn_step <span class="token operator">=</span> <span class="token number">0</span>  <span class="token comment"># for target updating</span>

    <span class="token keyword">def</span> <span class="token function">choose_action</span><span class="token punctuation">(</span>self<span class="token punctuation">,</span> s<span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># 定义动作选择函数 (s为状态)</span>
        s <span class="token operator">=</span> torch<span class="token punctuation">.</span>unsqueeze<span class="token punctuation">(</span>torch<span class="token punctuation">.</span>FloatTensor<span class="token punctuation">(</span>s<span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">)</span>  <span class="token comment"># 将s转换成32-bit floating point形式，并在dim=0增加维数为1的维度</span>
        <span class="token keyword">if</span> np<span class="token punctuation">.</span>random<span class="token punctuation">.</span>uniform<span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token operator">&lt;</span> EPSILON<span class="token punctuation">:</span>  <span class="token comment"># epsilon-greedy 生成一个在[0, 1)内的随机数，如果小于EPSILON，选择最优动作</span>
            <span class="token keyword">return</span> torch<span class="token punctuation">.</span><span class="token builtin">max</span><span class="token punctuation">(</span>self<span class="token punctuation">.</span>evaluate_net<span class="token punctuation">.</span>forward<span class="token punctuation">(</span>s<span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">.</span>data<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>  <span class="token comment"># 通过对评估网络输入状态s，前向传播获得动作值</span>
        <span class="token keyword">else</span><span class="token punctuation">:</span>  <span class="token comment"># 随机选择动作</span>
            <span class="token keyword">return</span> np<span class="token punctuation">.</span>random<span class="token punctuation">.</span>randint<span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> N_ACTIONS<span class="token punctuation">)</span>  <span class="token comment"># 这里action随机等于0或1 (N_ACTIONS = 2)</span>

    <span class="token keyword">def</span> <span class="token function">store_transition</span><span class="token punctuation">(</span>self<span class="token punctuation">,</span> s<span class="token punctuation">,</span> a<span class="token punctuation">,</span> r<span class="token punctuation">,</span> s_<span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># 定义记忆存储函数 (这里输入为一个transition)</span>
        self<span class="token punctuation">.</span>memory<span class="token punctuation">[</span>self<span class="token punctuation">.</span>point <span class="token operator">%</span> MEMORY_CAPACITY<span class="token punctuation">,</span> <span class="token punctuation">:</span><span class="token punctuation">]</span> <span class="token operator">=</span> np<span class="token punctuation">.</span>hstack<span class="token punctuation">(</span><span class="token punctuation">(</span>s<span class="token punctuation">,</span> <span class="token punctuation">[</span>a<span class="token punctuation">,</span> r<span class="token punctuation">]</span><span class="token punctuation">,</span> s_<span class="token punctuation">)</span><span class="token punctuation">)</span>  <span class="token comment"># 如果记忆库满了，便覆盖旧的数据</span>
        self<span class="token punctuation">.</span>point <span class="token operator">+=</span> <span class="token number">1</span>  <span class="token comment"># memory_counter自加1</span>

    <span class="token keyword">def</span> <span class="token function">sample_batch_data</span><span class="token punctuation">(</span>self<span class="token punctuation">,</span> batch_size<span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># 抽取记忆库中的批数据</span>
        perm_idx <span class="token operator">=</span> np<span class="token punctuation">.</span>random<span class="token punctuation">.</span>choice<span class="token punctuation">(</span><span class="token builtin">len</span><span class="token punctuation">(</span>self<span class="token punctuation">.</span>memory<span class="token punctuation">)</span><span class="token punctuation">,</span> batch_size<span class="token punctuation">)</span>
        <span class="token keyword">return</span> self<span class="token punctuation">.</span>memory<span class="token punctuation">[</span>perm_idx<span class="token punctuation">]</span>

    <span class="token keyword">def</span> <span class="token function">learn</span><span class="token punctuation">(</span>self<span class="token punctuation">)</span> <span class="token operator">-</span><span class="token operator">&gt;</span> <span class="token builtin">float</span><span class="token punctuation">:</span>  <span class="token comment"># 定义学习函数(记忆库已满后便开始学习)</span>
        <span class="token comment"># 目标网络参数更新</span>
        <span class="token keyword">if</span> self<span class="token punctuation">.</span>learn_step <span class="token operator">%</span> TARGET_REPLACE_ITER <span class="token operator">==</span> <span class="token number">0</span><span class="token punctuation">:</span>  <span class="token comment"># 一开始触发，然后每100步触发</span>
            self<span class="token punctuation">.</span>target_net<span class="token punctuation">.</span>load_state_dict<span class="token punctuation">(</span>self<span class="token punctuation">.</span>evaluate_net<span class="token punctuation">.</span>state_dict<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span>  <span class="token comment"># 将评估网络的参数赋给目标网络</span>
        self<span class="token punctuation">.</span>learn_step <span class="token operator">+=</span> <span class="token number">1</span>  <span class="token comment"># 学习步数自加1</span>

        <span class="token comment"># 抽取32个索引对应的32个transition，存入batch_memory</span>
        batch_memory <span class="token operator">=</span> self<span class="token punctuation">.</span>sample_batch_data<span class="token punctuation">(</span>BATCH_SIZE<span class="token punctuation">)</span>
        <span class="token comment"># 将32个s抽出，转为32-bit floating point形式，并存储到batch_state中，batch_state为32行4列</span>
        batch_state <span class="token operator">=</span> torch<span class="token punctuation">.</span>FloatTensor<span class="token punctuation">(</span>batch_memory<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token punctuation">,</span> <span class="token punctuation">:</span>N_STATES<span class="token punctuation">]</span><span class="token punctuation">)</span>
        <span class="token comment"># 将32个a抽出，转为64-bit integer (signed)形式，并存储到batch_action中 (LongTensor类型方便后面torch.gather的使用)，batch_action为32行1列</span>
        batch_action <span class="token operator">=</span> torch<span class="token punctuation">.</span>LongTensor<span class="token punctuation">(</span>batch_memory<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token punctuation">,</span> N_STATES<span class="token punctuation">:</span> N_STATES <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token builtin">int</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
        <span class="token comment"># 将32个r抽出，转为32-bit floating point形式，并存储到batch_reward中，batch_reward为32行1列</span>
        batch_reward <span class="token operator">=</span> torch<span class="token punctuation">.</span>FloatTensor<span class="token punctuation">(</span>batch_memory<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token punctuation">,</span> N_STATES <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">:</span> N_STATES <span class="token operator">+</span> <span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
        <span class="token comment"># 将32个s_抽出，转为32-bit floating point形式，并存储到batch_next_state中，batch_next_state为32行4列</span>
        batch_next_state <span class="token operator">=</span> torch<span class="token punctuation">.</span>FloatTensor<span class="token punctuation">(</span>batch_memory<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token punctuation">,</span> <span class="token operator">-</span>N_STATES<span class="token punctuation">:</span><span class="token punctuation">]</span><span class="token punctuation">)</span>

        <span class="token comment"># 获取32个transition的评估值和目标值，并利用损失函数和优化器进行评估网络参数更新</span>
        q_eval <span class="token operator">=</span> self<span class="token punctuation">.</span>evaluate_net<span class="token punctuation">(</span>batch_state<span class="token punctuation">)</span><span class="token punctuation">.</span>gather<span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">,</span> batch_action<span class="token punctuation">)</span>
        <span class="token comment"># eval_net(b_s)通过评估网络输出32行每个b_s对应的一系列动作值，然后.gather(1, b_a)代表对每行对应索引b_a的Q值提取进行聚合</span>
        q_next <span class="token operator">=</span> self<span class="token punctuation">.</span>target_net<span class="token punctuation">(</span>batch_next_state<span class="token punctuation">)</span><span class="token punctuation">.</span>detach<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># target network</span>
        <span class="token comment"># q_next不进行反向传递误差，所以detach；q_next表示通过目标网络输出32行每个b_s_对应的一系列动作值</span>
        q_target <span class="token operator">=</span> batch_reward <span class="token operator">+</span> GAMMA <span class="token operator">*</span> q_next<span class="token punctuation">.</span><span class="token builtin">max</span><span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">.</span>view<span class="token punctuation">(</span>BATCH_SIZE<span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">)</span>
        <span class="token comment"># q_next.max(1)[0]表示只返回每一行的最大值，不返回索引(长度为32的一维张量)；.view()表示把前面所得到的一维张量变成(BATCH_SIZE, 1)的形状；最终通过公式得到目标值</span>
        loss <span class="token operator">=</span> self<span class="token punctuation">.</span>loss_Function<span class="token punctuation">(</span>q_eval<span class="token punctuation">,</span> q_target<span class="token punctuation">)</span>

        <span class="token comment"># 输入32个评估值和32个目标值，使用均方损失函数</span>
        self<span class="token punctuation">.</span>optimizer<span class="token punctuation">.</span>zero_grad<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># 清空上一步的残余更新参数值</span>
        loss<span class="token punctuation">.</span>backward<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># 误差反向传播, 计算参数更新值</span>
        self<span class="token punctuation">.</span>optimizer<span class="token punctuation">.</span>step<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># 更新评估网络的所有参数</span>

        <span class="token keyword">return</span> loss<span class="token punctuation">.</span>data<span class="token punctuation">.</span>numpy<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># 返回损失函数数值</span>


<span class="token keyword">if</span> __name__ <span class="token operator">==</span> <span class="token string">"__main__"</span><span class="token punctuation">:</span>
    dqn <span class="token operator">=</span> DQN<span class="token punctuation">(</span><span class="token punctuation">)</span>

    writer <span class="token operator">=</span> SummaryWriter<span class="token punctuation">(</span><span class="token string">"run/MemoryCapacity_100_CustomReward/"</span><span class="token punctuation">)</span>
    writer<span class="token punctuation">.</span>add_graph<span class="token punctuation">(</span>dqn<span class="token punctuation">.</span>evaluate_net<span class="token punctuation">,</span> torch<span class="token punctuation">.</span>randn<span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">,</span> N_STATES<span class="token punctuation">)</span><span class="token punctuation">)</span>

    global_step <span class="token operator">=</span> <span class="token number">0</span>  <span class="token comment"># 绘图横坐标</span>
    <span class="token keyword">for</span> i <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span>EPOCH<span class="token punctuation">)</span><span class="token punctuation">:</span>  <span class="token comment"># episode循环</span>
        s <span class="token operator">=</span> env<span class="token punctuation">.</span>reset<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># 重置环境</span>
        running_loss <span class="token operator">=</span> <span class="token number">0</span>  <span class="token comment"># 损失函数值</span>
        cumulated_reward <span class="token operator">=</span> <span class="token number">0</span>  <span class="token comment"># 初始化该循环对应的episode的总奖励</span>
        step <span class="token operator">=</span> <span class="token number">0</span>

        <span class="token keyword">while</span> <span class="token boolean">True</span><span class="token punctuation">:</span>
            global_step <span class="token operator">+=</span> <span class="token number">1</span>
            env<span class="token punctuation">.</span>render<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># 显示实验动画</span>
            a <span class="token operator">=</span> dqn<span class="token punctuation">.</span>choose_action<span class="token punctuation">(</span>s<span class="token punctuation">)</span>  <span class="token comment"># 输入该步对应的状态s，选择动作</span>
            s_<span class="token punctuation">,</span> r<span class="token punctuation">,</span> done<span class="token punctuation">,</span> _ <span class="token operator">=</span> env<span class="token punctuation">.</span>step<span class="token punctuation">(</span>a<span class="token punctuation">)</span>  <span class="token comment"># 执行动作，获得反馈</span>

            <span class="token comment"># 修改奖励 (不修改也可以，修改奖励只是为了更快地得到训练好的摆杆)</span>
            x<span class="token punctuation">,</span> x_dot<span class="token punctuation">,</span> theta<span class="token punctuation">,</span> theta_dot <span class="token operator">=</span> s_
            r1 <span class="token operator">=</span> <span class="token punctuation">(</span>env<span class="token punctuation">.</span>x_threshold <span class="token operator">-</span> <span class="token builtin">abs</span><span class="token punctuation">(</span>x<span class="token punctuation">)</span><span class="token punctuation">)</span> <span class="token operator">/</span> env<span class="token punctuation">.</span>x_threshold <span class="token operator">-</span> <span class="token number">0.8</span>
            r2 <span class="token operator">=</span> <span class="token punctuation">(</span>env<span class="token punctuation">.</span>theta_threshold_radians <span class="token operator">-</span> <span class="token builtin">abs</span><span class="token punctuation">(</span>theta<span class="token punctuation">)</span><span class="token punctuation">)</span> <span class="token operator">/</span> env<span class="token punctuation">.</span>theta_threshold_radians <span class="token operator">-</span> <span class="token number">0.5</span>
            r <span class="token operator">=</span> r1 <span class="token operator">+</span> r2

            dqn<span class="token punctuation">.</span>store_transition<span class="token punctuation">(</span>s<span class="token punctuation">,</span> a<span class="token punctuation">,</span> r<span class="token punctuation">,</span> s_<span class="token punctuation">)</span>  <span class="token comment"># 存储样本</span>

            cumulated_reward <span class="token operator">+=</span> r  <span class="token comment"># 逐步加上一个episode内每个step的reward</span>
            <span class="token keyword">if</span> dqn<span class="token punctuation">.</span>point <span class="token operator">&gt;</span> MEMORY_CAPACITY<span class="token punctuation">:</span>  <span class="token comment"># 如果累计的transition数量超过了记忆库的固定容量2000</span>
                <span class="token comment"># 开始学习 (抽取记忆，即32个transition，并对评估网络参数进行更新，并在开始学习后每隔100次将评估网络的参数赋给目标网络)</span>
                loss <span class="token operator">=</span> dqn<span class="token punctuation">.</span>learn<span class="token punctuation">(</span><span class="token punctuation">)</span>
                running_loss <span class="token operator">+=</span> loss
                <span class="token keyword">if</span> done <span class="token keyword">or</span> step <span class="token operator">&gt;</span> <span class="token number">2000</span><span class="token punctuation">:</span>
                    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"��FAIL��Episode: %d| Step: %d| Loss:  %.4f, Reward: %.2f"</span> <span class="token operator">%</span> <span class="token punctuation">(</span>
                        i<span class="token punctuation">,</span> step<span class="token punctuation">,</span> running_loss <span class="token operator">/</span> step<span class="token punctuation">,</span> cumulated_reward<span class="token punctuation">)</span><span class="token punctuation">)</span>
                    writer<span class="token punctuation">.</span>add_scalar<span class="token punctuation">(</span><span class="token string">"training/Loss"</span><span class="token punctuation">,</span> running_loss <span class="token operator">/</span> step<span class="token punctuation">,</span> global_step<span class="token punctuation">)</span>
                    writer<span class="token punctuation">.</span>add_scalar<span class="token punctuation">(</span><span class="token string">"training/Reward"</span><span class="token punctuation">,</span> cumulated_reward<span class="token punctuation">,</span> global_step<span class="token punctuation">)</span>
                    <span class="token keyword">break</span>
            <span class="token keyword">else</span><span class="token punctuation">:</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"\rCollecting experience: %d / %d..."</span> <span class="token operator">%</span> <span class="token punctuation">(</span>dqn<span class="token punctuation">.</span>point<span class="token punctuation">,</span> MEMORY_CAPACITY<span class="token punctuation">)</span><span class="token punctuation">,</span> end<span class="token operator">=</span><span class="token string">''</span><span class="token punctuation">)</span>

            <span class="token keyword">if</span> done<span class="token punctuation">:</span>
                <span class="token keyword">break</span>
            <span class="token keyword">if</span> step <span class="token operator">%</span> <span class="token number">100</span> <span class="token operator">==</span> <span class="token number">99</span><span class="token punctuation">:</span>
                <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"Episode: %d| Step: %d| Loss:  %.4f, Reward: %.2f"</span> <span class="token operator">%</span> <span class="token punctuation">(</span>
                    i<span class="token punctuation">,</span> step<span class="token punctuation">,</span> running_loss <span class="token operator">/</span> step<span class="token punctuation">,</span> cumulated_reward<span class="token punctuation">)</span><span class="token punctuation">)</span>
            step <span class="token operator">+=</span> <span class="token number">1</span>
            s <span class="token operator">=</span> s_
<span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>

                
            </div>
            <hr/>

            

    <div class="reprint" id="reprint-statement">
        
            <div class="reprint__author">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-user">
                        文章作者:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="/about" rel="external nofollow noreferrer">艾茜茜</a>
                </span>
            </div>
            <div class="reprint__type">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-link">
                        文章链接:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="http://wangjueya.github.io/practice-maze/">http://wangjueya.github.io/practice-maze/</a>
                </span>
            </div>
            <div class="reprint__notice">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-copyright">
                        版权声明:
                    </i>
                </span>
                <span class="reprint-info">
                    本博客所有文章除特別声明外，均采用
                    <a href="https://creativecommons.org/licenses/by/4.0/deed.zh" rel="external nofollow noreferrer" target="_blank">CC BY 4.0</a>
                    许可协议。转载请注明来源
                    <a href="/about" target="_blank">艾茜茜</a>
                    !
                </span>
            </div>
        
    </div>

    <script async defer>
      document.addEventListener("copy", function (e) {
        let toastHTML = '<span>复制成功，请遵循本文的转载规则</span><button class="btn-flat toast-action" onclick="navToReprintStatement()" style="font-size: smaller">查看</a>';
        M.toast({html: toastHTML})
      });

      function navToReprintStatement() {
        $("html, body").animate({scrollTop: $("#reprint-statement").offset().top - 80}, 800);
      }
    </script>



            <div class="tag_share" style="display: block;">
                <div class="post-meta__tag-list" style="display: inline-block;">
                    
                        <div class="article-tag">
                            
                                <a href="/tags/DQN/">
                                    <span class="chip bg-color">DQN</span>
                                </a>
                            
                        </div>
                    
                </div>
                <div class="post_share" style="zoom: 80%; width: fit-content; display: inline-block; float: right; margin: -0.15rem 0;">
                    <link rel="stylesheet" type="text/css" href="/libs/share/css/share.min.css">
<div id="article-share">

    
    <div class="social-share" data-sites="twitter,facebook,google,qq,qzone,wechat,weibo,douban,linkedin" data-wechat-qrcode-helper="<p>微信扫一扫即可分享！</p>"></div>
    <script src="/libs/share/js/social-share.min.js"></script>
    

    

</div>

                </div>
            </div>
            
                <div id="reward">
    <a href="#rewardModal" class="reward-link modal-trigger btn-floating btn-medium waves-effect waves-light red">赏</a>

    <!-- Modal Structure -->
    <div id="rewardModal" class="modal">
        <div class="modal-content">
            <a class="close modal-close"><i class="fas fa-times"></i></a>
            <h4 class="reward-title">你的赏识是我前进的动力</h4>
            <div class="reward-content">
                <div class="reward-tabs">
                    <ul class="tabs row">
                        <li class="tab col s6 alipay-tab waves-effect waves-light"><a href="#alipay">支付宝</a></li>
                        <li class="tab col s6 wechat-tab waves-effect waves-light"><a href="#wechat">微 信</a></li>
                    </ul>
                    <div id="alipay">
                        <img src="/medias/reward/alipay.png" class="reward-img" alt="支付宝打赏二维码">
                    </div>
                    <div id="wechat">
                        <img src="/medias/reward/wechat.png" class="reward-img" alt="微信打赏二维码">
                    </div>
                </div>
            </div>
        </div>
    </div>
</div>

<script>
    $(function () {
        $('.tabs').tabs();
    });
</script>

            
        </div>
    </div>

    

    

    

    

    

    

    

    

    

<article id="prenext-posts" class="prev-next articles">
    <div class="row article-row">
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge left-badge text-color">
                <i class="fas fa-chevron-left"></i>&nbsp;上一篇</div>
            <div class="card">
                <a href="/aiassistant/">
                    <div class="card-image">
                        
                        
                        <img src="/medias/featureimages/14.jpg" class="responsive-img" alt="AIAssistant">
                        
                        <span class="card-title">AIAssistant</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            AIAssistant
                        
                    </div>
                    <div class="publish-info">
                        <span class="publish-date">
                            <i class="far fa-clock fa-fw icon-date"></i>2025-02-13
                        </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/07-Programming-Assistant/" class="post-category">
                                    07-Programming-Assistant
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/AI/">
                        <span class="chip bg-color">AI</span>
                    </a>
                    
                    <a href="/tags/Assistant/">
                        <span class="chip bg-color">Assistant</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge right-badge text-color">
                下一篇&nbsp;<i class="fas fa-chevron-right"></i>
            </div>
            <div class="card">
                <a href="/instruction/">
                    <div class="card-image">
                        
                        
                        <img src="/medias/featureimages/17.jpg" class="responsive-img" alt="DQN">
                        
                        <span class="card-title">DQN</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            DQN instruction
                        
                    </div>
                    <div class="publish-info">
                            <span class="publish-date">
                                <i class="far fa-clock fa-fw icon-date"></i>2023-12-28
                            </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/05-Program-Design/" class="post-category">
                                    05-Program-Design
                                </a>
                            
                            <a href="/categories/05-Program-Design/Algorithm/" class="post-category">
                                    Algorithm
                                </a>
                            
                            <a href="/categories/05-Program-Design/Algorithm/DQN/" class="post-category">
                                    DQN
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/DQN/">
                        <span class="chip bg-color">DQN</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
    </div>
</article>

</div>



<!-- 代码块功能依赖 -->
<script type="text/javascript" src="/libs/codeBlock/codeBlockFuction.js"></script>


  <!-- 是否加载使用自带的 prismjs. -->
  <script type="text/javascript" src="/libs/prism/prism.min.js"></script>


<!-- 代码语言 -->

<script type="text/javascript" src="/libs/codeBlock/codeLang.js"></script>


<!-- 代码块复制 -->

<script type="text/javascript" src="/libs/codeBlock/codeCopy.js"></script>


<!-- 代码块收缩 -->

<script type="text/javascript" src="/libs/codeBlock/codeShrink.js"></script>



    </div>
    <div id="toc-aside" class="expanded col l3 hide-on-med-and-down">
        <div class="toc-widget card" style="background-color: white;">
            <div class="toc-title"><i class="far fa-list-alt"></i>&nbsp;&nbsp;目录</div>
            <div id="toc-content"></div>
        </div>
    </div>
</div>

<!-- TOC 悬浮按钮. -->

<div id="floating-toc-btn" class="hide-on-med-and-down">
    <a class="btn-floating btn-large bg-color">
        <i class="fas fa-list-ul"></i>
    </a>
</div>


<script src="/libs/tocbot/tocbot.min.js"></script>
<script>
    $(function () {
        tocbot.init({
            tocSelector: '#toc-content',
            contentSelector: '#articleContent',
            headingsOffset: -($(window).height() * 0.4 - 45),
            collapseDepth: Number('0'),
            headingSelector: 'h2, h3, h4'
        });

        // Set scroll toc fixed.
        let tocHeight = parseInt($(window).height() * 0.4 - 64);
        let $tocWidget = $('.toc-widget');
        $(window).scroll(function () {
            let scroll = $(window).scrollTop();
            /* add post toc fixed. */
            if (scroll > tocHeight) {
                $tocWidget.addClass('toc-fixed');
            } else {
                $tocWidget.removeClass('toc-fixed');
            }
        });

        
        /* 修复文章卡片 div 的宽度. */
        let fixPostCardWidth = function (srcId, targetId) {
            let srcDiv = $('#' + srcId);
            if (srcDiv.length === 0) {
                return;
            }

            let w = srcDiv.width();
            if (w >= 450) {
                w = w + 21;
            } else if (w >= 350 && w < 450) {
                w = w + 18;
            } else if (w >= 300 && w < 350) {
                w = w + 16;
            } else {
                w = w + 14;
            }
            $('#' + targetId).width(w);
        };

        // 切换TOC目录展开收缩的相关操作.
        const expandedClass = 'expanded';
        let $tocAside = $('#toc-aside');
        let $mainContent = $('#main-content');
        $('#floating-toc-btn .btn-floating').click(function () {
            if ($tocAside.hasClass(expandedClass)) {
                $tocAside.removeClass(expandedClass).hide();
                $mainContent.removeClass('l9');
            } else {
                $tocAside.addClass(expandedClass).show();
                $mainContent.addClass('l9');
            }
            fixPostCardWidth('artDetail', 'prenext-posts');
        });
        
    });
</script>

    

</main>




    <footer class="page-footer bg-color">
    
        <link rel="stylesheet" href="/libs/aplayer/APlayer.min.css">
<style>
    .aplayer .aplayer-lrc p {
        
        display: none;
        
        font-size: 12px;
        font-weight: 700;
        line-height: 16px !important;
    }

    .aplayer .aplayer-lrc p.aplayer-lrc-current {
        
        display: none;
        
        font-size: 15px;
        color: #42b983;
    }

    
    .aplayer.aplayer-fixed.aplayer-narrow .aplayer-body {
        left: -66px !important;
    }

    .aplayer.aplayer-fixed.aplayer-narrow .aplayer-body:hover {
        left: 0px !important;
    }

    
</style>
<div class="">
    
    <div class="row">
        <meting-js class="col l8 offset-l2 m10 offset-m1 s12"
                   server="netease"
                   type="playlist"
                   id="503838841"
                   fixed='true'
                   autoplay='false'
                   theme='#42b983'
                   loop='all'
                   order='random'
                   preload='auto'
                   volume='0.7'
                   list-folded='true'
        >
        </meting-js>
    </div>
</div>

<script src="/libs/aplayer/APlayer.min.js"></script>
<script src="/libs/aplayer/Meting.min.js"></script>

    

    <div class="container row center-align"
         style="margin-bottom: 0px !important;">
        <div class="col s12 m8 l8 copy-right">
            Copyright&nbsp;&copy;
            
                <span id="year">2019-2025</span>
            
            <a href="/about" target="_blank">艾茜茜</a>
            |&nbsp;Powered by&nbsp;<a href="https://hexo.io/" target="_blank">Hexo</a>
            |&nbsp;Theme&nbsp;<a href="https://github.com/WANGJUEYA/hexo-theme-christmas-tree.git" target="_blank">christmas-tree</a>
            <br>
            
                &nbsp;<i class="fas fa-chart-area"></i>&nbsp;站点总字数:&nbsp;<span
                        class="white-color">67.8k</span>
            
            
            
                
            
            
                <span id="busuanzi_container_site_pv">
                &nbsp;|&nbsp;<i class="far fa-eye"></i>&nbsp;总访问量:&nbsp;
                    <span id="busuanzi_value_site_pv" class="white-color"></span>
            </span>
            
            
                <span id="busuanzi_container_site_uv">
                &nbsp;|&nbsp;<i class="fas fa-users"></i>&nbsp;总访问人数:&nbsp;
                    <span id="busuanzi_value_site_uv" class="white-color"></span>
            </span>
            
            <br>

            <!-- 运行天数提醒. -->
            
            <br>
            
        </div>
        <div class="col s12 m4 l4 social-link social-statis">
    <a href="https://github.com/wangjueya" class="tooltipped" target="_blank" data-tooltip="访问我的GitHub" data-position="top" data-delay="50">
        <i class="fab fa-github"></i>
    </a>



    <a href="mailto:573711282@qq.com" class="tooltipped" target="_blank" data-tooltip="邮件联系我" data-position="top" data-delay="50">
        <i class="fas fa-envelope-open"></i>
    </a>







    <a href="tencent://AddContact/?fromId=50&fromSubId=1&subcmd=all&uin=573711282" class="tooltipped" target="_blank" data-tooltip="QQ联系我: 573711282" data-position="top" data-delay="50">
        <i class="fab fa-qq"></i>
    </a>







    <a href="/atom.xml" class="tooltipped" target="_blank" data-tooltip="RSS 订阅" data-position="top" data-delay="50">
        <i class="fas fa-rss"></i>
    </a>

</div>
    </div>
</footer>

<div class="progress-bar"></div>


    <!-- 搜索遮罩框 -->
<div id="searchModal" class="modal">
    <div class="modal-content">
        <div class="search-header">
            <span class="title"><i class="fas fa-search"></i>&nbsp;&nbsp;搜索</span>
            <input type="search" id="searchInput" name="s" placeholder="请输入搜索的关键字"
                   class="search-input">
        </div>
        <div id="searchResult"></div>
    </div>
</div>

<script type="text/javascript">
$(function () {
    var searchFunc = function (path, search_id, content_id) {
        'use strict';
        $.ajax({
            url: path,
            dataType: "xml",
            success: function (xmlResponse) {
                // get the contents from search data
                var datas = $("entry", xmlResponse).map(function () {
                    return {
                        title: $("title", this).text(),
                        content: $("content", this).text(),
                        url: $("url", this).text()
                    };
                }).get();
                var $input = document.getElementById(search_id);
                var $resultContent = document.getElementById(content_id);
                $input.addEventListener('input', function () {
                    var str = '<ul class=\"search-result-list\">';
                    var keywords = this.value.trim().toLowerCase().split(/[\s\-]+/);
                    $resultContent.innerHTML = "";
                    if (this.value.trim().length <= 0) {
                        return;
                    }
                    // perform local searching
                    datas.forEach(function (data) {
                        var isMatch = true;
                        var data_title = data.title.trim().toLowerCase();
                        var data_content = data.content.trim().replace(/<[^>]+>/g, "").toLowerCase();
                        var data_url = data.url;
                        data_url = data_url.indexOf('/') === 0 ? data.url : '/' + data_url;
                        var index_title = -1;
                        var index_content = -1;
                        var first_occur = -1;
                        // only match artiles with not empty titles and contents
                        if (data_title !== '' && data_content !== '') {
                            keywords.forEach(function (keyword, i) {
                                index_title = data_title.indexOf(keyword);
                                index_content = data_content.indexOf(keyword);
                                if (index_title < 0 && index_content < 0) {
                                    isMatch = false;
                                } else {
                                    if (index_content < 0) {
                                        index_content = 0;
                                    }
                                    if (i === 0) {
                                        first_occur = index_content;
                                    }
                                }
                            });
                        }
                        // show search results
                        if (isMatch) {
                            str += "<li><a href='" + data_url + "' class='search-result-title'>" + data_title + "</a>";
                            var content = data.content.trim().replace(/<[^>]+>/g, "");
                            if (first_occur >= 0) {
                                // cut out 100 characters
                                var start = first_occur - 20;
                                var end = first_occur + 80;
                                if (start < 0) {
                                    start = 0;
                                }
                                if (start === 0) {
                                    end = 100;
                                }
                                if (end > content.length) {
                                    end = content.length;
                                }
                                var match_content = content.substr(start, end);
                                // highlight all keywords
                                keywords.forEach(function (keyword) {
                                    var regS = new RegExp(keyword, "gi");
                                    match_content = match_content.replace(regS, "<em class=\"search-keyword\">" + keyword + "</em>");
                                });

                                str += "<p class=\"search-result\">" + match_content + "...</p>"
                            }
                            str += "</li>";
                        }
                    });
                    str += "</ul>";
                    $resultContent.innerHTML = str;
                });
            }
        });
    };

    searchFunc('/search.xml', 'searchInput', 'searchResult');
});
</script>

    <!-- 白天和黑夜主题 -->
<div class="stars-con">
    <div id="stars"></div>
    <div id="stars2"></div>
    <div id="stars3"></div>  
</div>

<script>
    function switchNightMode() {
        $('<div class="Cuteen_DarkSky"><div class="Cuteen_DarkPlanet"></div></div>').appendTo($('body')),
        setTimeout(function () {
            $('body').hasClass('DarkMode') 
            ? ($('body').removeClass('DarkMode'), localStorage.setItem('isDark', '0'), $('#sum-moon-icon').removeClass("fa-sun").addClass('fa-moon')) 
            : ($('body').addClass('DarkMode'), localStorage.setItem('isDark', '1'), $('#sum-moon-icon').addClass("fa-sun").removeClass('fa-moon')),
            
            setTimeout(function () {
            $('.Cuteen_DarkSky').fadeOut(1e3, function () {
                $(this).remove()
            })
            }, 2e3)
        })
    }
</script>

    <!-- 回到顶部按钮 -->
<div id="backTop" class="top-scroll">
    <a class="btn-floating btn-large waves-effect waves-light" href="#!">
        <i class="fas fa-arrow-up"></i>
    </a>
</div>


    <script src="/libs/materialize/materialize.min.js"></script>
    <script src="/libs/masonry/masonry.pkgd.min.js"></script>
    <script src="/libs/aos/aos.js"></script>
    <script src="/libs/scrollprogress/scrollProgress.min.js"></script>
    <script src="/libs/lightGallery/js/lightgallery-all.min.js"></script>
    <script src="/js/matery.js"></script>

    
        <!-- <script src='https://unpkg.com/mermaid@latest/dist/mermaid.min.js'></script> -->
        <script src='/libs/mermaid/mermaid.min.js'></script>
        <script>
          if (window.mermaid) {
            mermaid.initialize({theme: 'forest'});
          }
        </script>
    

    

    <!-- 雪花特效 -->
    

    <!-- 鼠标星星特效 -->
    

     
        <script src="https://ssl.captcha.qq.com/TCaptcha.js"></script>
        <script src="/libs/others/TencentCaptcha.js"></script>
        <button id="TencentCaptcha" data-appid="xxxxxxxxxx" data-cbfn="callback" type="button" hidden></button>
    

    <!-- Baidu Analytics -->

    <!-- Baidu Push -->

<script>
    (function () {
        var bp = document.createElement('script');
        var curProtocol = window.location.protocol.split(':')[0];
        if (curProtocol === 'https') {
            bp.src = 'https://zz.bdstatic.com/linksubmit/push.js';
        } else {
            bp.src = 'http://push.zhanzhang.baidu.com/push.js';
        }
        var s = document.getElementsByTagName("script")[0];
        s.parentNode.insertBefore(bp, s);
    })();
</script>

    
    <script src="/libs/others/clicklove.js" async="async"></script>
    
    
    <script async src="/libs/others/busuanzi.pure.mini.js"></script>
    

    

    

    <!--腾讯兔小巢-->
    
    

    

    

    
    <script src="/libs/instantpage/instantpage.js" type="module"></script>
    

</body>

</html>
